PERSISTENT ARCHIVES 
Inventors: Reagan W. Moore, et al. 
Howrey Docket No. 02737.0004.NPUS01 
68/118 



"OTN" => "OTN ^object_title_name", 

"OTT" => "OTT_title_type ", 

"OTY" => "OTY ^object_type", 

"RDD" => "RDD reiated_document_description", 

"RDG" => "RDG_related_documents", 

"RDL" => "RDL related_documentJdentifier_link", 

"RDR" => "RDR related_document_relationship_type", 

"RID" => "RID relatedJmage_description", 

"RIG" => "RiG_relatedJmages", 

"RIL" => "RIL relatedjmagejdentifierjink", 

"RIP" => "RIP relatedjmage_preferred", 

"RIR" => "RiR related_image_relationship_type", 

"RMD" => "RMD related_multimedia_description", 

"RMG" => "RMG_related_muitiinedia", 

"RML" => "RML re!ated_multimediajdentifierjink", 

"RMR" => "RMR related_multimedia_relationship_type", 

"RWD" => "RWD related_works_description", 

"RWG" => "RWG_related_works_of_art", 

"RWL" => "RWL related_worksJdentifierJink", 

"RWR" => "RWR_related_works_reIationship_type ", 

"STD" => "STD ^style_period_description", 

"STG" => "STG_style_period", 

"STT" => "STT stylej3eriod_terms ", 

"SUG" => "SUG_subject_matter", 

"SUI" => "SUI ^subject_matterJconography", 

"SUP" => "SUP ^subject_matter_preiconographic_description", 

"SUT" => "SUT ^subject_matterjndex_terms ", 

"XAM" => "XAM ^amico_mode", 

"XCC" => "XCC ^dc_creator_corporatename", 

"XCM" => "XCM ^amlco_format_colormetric", 

"XCN" => "XCN_dc_creator", 

"XCP" => "XCP dc_creator jiersonalname", 

"XCR" => "XCR ^dc_creator_role ", 

"XDA" => "XDA_dc_date", 

"XDC" => "XDC dc_contributor_corporatenanne", 

"XDE" => "XDE ^dc_description", 

"XDL" => "XDL metadata_delition_flag", 

"XDN" => "XDN_dc_contributor", 

"XDP" => "XDP ^dc_contributor_personalname", 

"XDR"'=> "XDR dc_contributor_role ", 

"XFG" => "XFG ^amico_fonnat_compression", 

"XFD" => "XFD ^amico_format_dimensions", 

"XFE" => "XFE amico_format_encoding", 

"XFF" => "XFF ^amico_format_f!lesize", 

"XFG" => "XFO_dc_format", 

"XFP" => "XFP ^amico_format_colorpalette". 

"XI D" => "XID dc_resourceJdentifier", 

"X.LY" => "XLY_metadataJibrary_year", 

"XMN" => "XMN amico_media_note", 

"XPR" => "XPR metadata_data_processing_note", 

"XPU" => "XPU_dc_publisher", 



FIGURE 35C 



PERSISTENT ARCHIVES 
Inventors: Reagan W. Moore, et al. 
Howey Docket No. 02737.0004.NPUS01 
69/118 



"XRI" => "XRI dc_relationJdentifier", 

"XRS" => "XRS dc_rights", 

"XRT" => "XRT dc_resourcetype", 

"XRY" => "XRY ^dc_relation_type", 

"XTI" => "XTI_dc_title", 

"XVD" => "XVD amico_metadata_vaiidation_date", 

"XVV" => "XW amico data dictionary version" 

); 

sub name { # long or short tagnames 

return $_[0] # short 

# return $long_tag{$_[0]} # long 

} 

my %is_group = (); # what tags are groups? 

while ((my $tag, my $long) = each %long_tag) { 
$is_group{$tag} = 1 if ($long !~ /_/); 

} 



my %group_members = # members of each group 
( 

"CLG" => "CLS CLT", 

"CRG" => "CRQ CRT CRN CRC CDT CBD CBP CBQ CDD CDP CDQ CAD CAP CGN CRB CRR 

CNO", 

"CXG" => "CXD CXP CXS CXT", 

"DCG" => "DCB DCD", 

"IVIEG" => "MCM MED MDV MDU MEQ", 

"OCG" => "OCT OCS OCE OCQ", 

"OIVIG" => "OMD OIVIT OMM QMS", 

"OOG" => "DON OOP OOA OOC", 

"ORG" => "ORS ORL", 

"OTG" => "OTN OTT", 

"RDG" => "RDD RDR RDL", 

"RIG" => "RIP RID RIR RIL", 

"RIV1G" => "RMD RMR RML", 

"RWG" => "RW.D RWR RWL". 

"STG" => "STD STT", 

"SUG" => "SUP SUI SUT", 

"XCN" => "XCP XCC XCR", 

"XDN" => "XDP XDC XDR", 

"XFO" => "XFE XFP XCM XFD XFF XFC", 

"XRE" => "XRY XRI" 

); 



FIGURE 35D 



PERSISTENT ARCHIVES 
Inventors: Reagan W. Moore, et al. 
Howrey Docket No. 02737.0004.NPUS01 
70/118 



my %group = (); # inverse: returns the group of a member 

while ((my $group, my $memstr) = each %group_members) { 
my ©members = ($memstr =~ m/[A-Z]{3}/g); 
foreach (©members) { 
$group{$J = Sgroup; 

} 

} 



sub escXML { # escape characters: EXTEND/FIX! 

$_ = $_[0]; 
s/</</g; 
s/&y&/g; 
s/a/a/g; 
s/a/a/g; 
s/a/a/g; 
s/e/e/g; 
s/e/e/g; 
s/n/n/g; 

$_; 



sub output_fields { # output all fields from current record 
@ record = @_; 

my $field = shift ©record; # get the next field 
if (defined $field) { 

&output_field($fie!d); # output it (and more if group!) 

&output_fields(@record); # recurse through the remains 

# of ©record 

} 

} 

sub outputjeld { # output the given field PLUS follow-up group 
# members! 
(my $field) = @_; 

if (Sfield =~ /([A-Z]{3})(.*)/) { # is it a 3 letter tag + data? 
my Stag = $1 ; my $data = $2; 
if ($longJag{$tag}) { # do we know this tag? 

if ($is_group{$tag}) { # is it a group? 

print- &ind, "<", &name($tag), ">\n"; 
Stab +=2 ; 

&output__group($tag); # and output closing tag! 

else { # it's a simple recognized tag 

print &ind, "<", &name($tag), ">", 
&escXML($data), 
"<r, &name($tag), ">\n"; 

} ^ else { # don't know this tag! 

print "<am_ERROR type = V'unrecognized tag\">"; 
print &ind, "<$tag>", &escXML($data), "</Stag>\n"; 
print "</am_ERROR>\n"; 

} 
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} else { # don't understand that field! 

print "<am_ERROR type = V'unrecognized field\">", $_; 
print "</am_ERROR>\n"; 

} 

} 

sub output_group { # output fields WHILE in same group 
(my $grp) = @_; 

my $field = shift ©record; # get the next field 
if (defined $field) { 
if ($field =~ /{[A-Z]{3})/) {# should be a tag 
if (defined $group{$1} and $group{$1} eq $grp) { 

# still in the same group?? 

&output_field($field); 

&output_group($grp); # STAY in the same group 

} else { # LEAVE group! 

$tab -= 2; 

print &ind, "<P\ &name($grp), ">\n"; # closing tag for group 
&output_field($field); 

} 

} else { # don't understand that field! 

print "<am_ERROR type = V'unrecognized field\">", $_; 
print "</am_ERROR>\n"; 

} 

} # empty ©record => done 



sub get_record { # read the next record and return first tag 
chomp; 

©record = split A}~/; # End-Of-Record = "|\n" 

my $iast = pop(@record); # get EOR 
if ($last ne "|") { # ...and check 

print STDOUT "*** ERROR: unknown delimiter <$iast>\n"; 

} 

if (@record) { # check If non-empty 

return substr($record[0],0,3) # return the tag name 
} else { 

print STDOUT "*** WARNING: empty recordVn"; 
return 0 

} 
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# MAIN 

$1 = 1; 

open{AM_OBJS, ">amico_objects.xml") or die "*** ERROR <@_>\n<$?>\n"; 
open(AM_MEDIA, ">amico_media.xml") or die "*** ERROR <@_>\n<$?>\n"; 

print AM_OBJS «EOF; 

<am_objects> 

EOF 

print AM_MEDIA «EOF; 

<am_media> 

EOF 

# print AM_OBJS «EOF; 

# <?xml:stylesheet type="text/xsl" href="amico_objects.xsl"?> 

# <am_objects> 
#EOF 

# print AM_MEDIA «EOF; 

# <?xml:stylesheet type="text/xsl" href="amico_nnedia.xsi"?> 

# <am_media> 
#EOF 

while (<>) { 

if (my Stag = &get_record) { # get next record and tag name 

if (Stag eq "AID") { 

select(AM_OBJS); 

print &ind, "<am_object>\n"; 

Stab += 2; 
&output_fields(@record) ; 

Stab -= 2; 

print &ind, "</am_object>\n"; 
} elsif (Stag eq "XID") { 
seiect(AM_MEDIA); 
print &ind, "<am_media_metadata>\n"; 
Stab += 2; 

&output_fields(@record) ; 
Stab -= 2; 

print &ind, "</am_media_metadata>\n"; 
} else { 

print STDOUT "*** ERROR: unknown tag <$tag> in record: <@record>\n 

} 

} else { 

print STDOUT "*** ERROR: get_record failed\n"; 

} 

} 



print AI\/l_OBJS "</am_objects>\n"; 
print AM_MEDIA "</am_media>\n"; 
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# Perl Script to convert XML SLA version 

# into software independent OAV 

# representation, ready to be # loaded 

# into a variety of engines : 

# - Prolog engine, or 

# - relational database engine, or 

# - XML database engine, or 

# - other 
# 

# SCRIPT devised by Richard Marciano & 

# Bertram Ludaescher & 

# Reagan Moore 

# August 20, 2000, copyright RiM + BL + ReM 
# 



# ! /usr/local/bin/perl 
#use strict; 

# 

®boa; %boatype_hash = () ; my $bill_count = 0; 
my $amdt_count = 0; my $con_res_count = 0 ,- 
my $j_res_count = 0 ; my $res_count = 0; 

©abstract; %a_value = (); 
®congressional_record; %cr_value = (); 
©cosponsors; %cs_value = () ; ®date_introduced; 

%di_value = (); ©digest; %d_value = (); 
®latest_status ; %ls_value = (); @status_actions; 

%sa_value = () ; ©of f icial_title ; %ot_value = {); 
©sponsor; %s_value = ( ) ; ®statement_of_purpose ; 

%sop_value = (); @submitted_by; %sb_value = (); 
®submitted_for; %sf_value = () ; ®f ilename_senator ; 
@f ilename_period; ®prepared_by ; ©senator; 
®occurrence_section; ®occurrence_committee ; 

®topic_index; %ti_value= (); 



my $MORE_SIZE = 20; 

my $bill_index_2 = 
my %tempH = { ) ; 

my $line; ray $hl; my $h2 = ""; my $h3 ; my $h4; my $hS ; my $h6_l = ""; my $h6_2 
my $prev_h2 = " " ; 

my $section = 0; my $committee = ""; 

my $senNAME = ""; my $state = ""; my $senid; 

my $filename; 
my $line_number ; 
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my ©allfile; 

opendir THISDIR , "XMLDATA" or die "can't find DIRECTORY: $!"; 
®allfile = grep /_LAR/ , readdir THISDIR; 
closedir THISDIR; 

open( LOG, ">logfile" ) | | die "*ERROR: can't open logfile\n" ; 



foreach my $sen (@allfile) { 



#my $sen = "D_$ARGV [ 1] _LAR$ARGV [ 0] _106 . xml " 
#my $sen = "D_1CP_LAR_S106_106 .xml" ; 
#my $sen = "D_1CP_LAR_S216_106 .xml" ; 
#my $sen = "D_1_LARI_S272_106 .xml " ; 

$sen =- m/. + \_.+\_.+\_S(\d+)\_.4./; 
$senid = $1; 
$senid = $1; 
$senNAME = " " ; 



open( SEN106, " XMLDATA/ $ sen" ) | | die "*ERROR: can't open $sen\i 
#open( SEN106, "$sen" ) || die "*ERROR: can't open $sen\n" ; 

&process_header ( $sen ) ; 

Nl: while ( $line = <SEN106> ) { 
$ 1 ine_number + + ; 

N2 : ; 

# DETECT SECTION headers 

if ( $line =- m/hidden="on">(.+)<\/string>/ ) { 
$hl = $1; 

# <p align="left" bold= "on" xstring bold= "on" >SECTION I. SPONSORED 
MEASURES</string></p> 

if ( $hl =- m/SECTION I\./ ) { 

$section = 1; $h6_l = " " ; $h6_2 = " " ; 

my $ov = $senid . "_" . " $line_number" ; 

ray $ien = $#occurrence_section; 

$occurrence_section[ $len + 1 ] = [ ($senid, $line_numb« 
$section, $hl) ] ; 
} 

elsif { $hl =- m/SECTION II\./ ) { 

$ sect ion = 2; $h6_l = ""; $h6_2 = ""; 

my $ov = $senid . "_" . " $line_number" ; 
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elsif ( $hl =- m/ SECTION IV\ . / ) { 
$section = 4 ; 



my $ov = $senid . "_" . "$line_nuirber" ; 



my $len = $#occurrence_section; 

$occurrence_section[ $len + 1 ] = [ ($senid, $line_number, 
$section, ] ; 

} 

elsif { $hl =- m/SECTlON V\ . / ) { 

$section = 5; $h6_l = " " ; $h6_2 = " " ; 

my $ov = $senid . "_" . " $line_number " ; 

my $len = $#occurrence_section; 

$occurrence_section[ $len + 1 ] = [ ($senid, $ 1 ine_iiumber , 
$section, $hl) ] ; 
} 

elsif ( $h.l =- m/SECTION VI\ . / ) { 

$section = S; $he_l = " " ; $h6_2 = " " ; 



my $ov = $senid . "_" . " $line_number" ; 



my $len = $#occurrence_section; 

$occurrence_section[ $leii + 1 ] = [ ($senid, $ 1 ine_number , 
$section, $hl) ]; 
} 

elsif ( $hl =- m/SECTIOK VII\./ ) { 

$section = 7; $h6_l = " " ; $h6_2 = " " ; 

my $ov = $senid . . " $line_nuinber" ; 



my $len = $#occurrerLce_section; 

$occurrence_section[ $len + 1 ] = [ ($senid, $line_number , 
$section, $hl) ]; 



S:process_index; 

} 

else { 

print LOG "Ml error: COULD NOT RECOGNIZE SECTION NUMBER ! ! ! 

) 



# DETECT **** BILL NUMBERS 

elsif ( $line =- m/>\*\*\*\* (.+)<\/p>/ ) { 
$h2 = $1; 

$h2 =- s/\s*//g; 



if ( $prev_h2 ne ) { 

# S.I23 1 dot 

# S.Amdt.l23 2 dots 
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$boatype_hash{ bill }{ $bill }++; 

} 

} 

else { 

if( $list[l] eq "Amdt" ) { 
$aind.t_count + + ; 

my $bill; 

foreach $bill ( sort keys %{$tempH{ $prev_h2 } } ) { 
if ( $boatype_hash{ amdt }{ $bill } eq ) { 
$boatype_hash{ amdt }{ $bill } = O; 

} 

$boatype_hash{ amdt }{ $bill }++; 

} 

} 

elsif( $list[l] eq "Con" ) { 
$con_res_count++ ; 

foreach my $bill { sort keys % { $tempH { $prev_h2 } } ) ( 
if ( $boatype_hash{ con_res }{ $bill } eq " " ) { 
$boatype_hash{ con_res }{ $bill } = 0; 

} 

$boatype_hash{ con_res }{ $bill }++; 

} 

} 

elsif( $list[l] eq "J" ) { 
$j_res_count++; 

foreach my $bill ( sort keys %{ $tempH{ $prev_h2 } } ) { 
if ( $boatype_hash{ j_res }{ $bill } eq " " ) { 
$boatype_hash{ j_res }{ $bill } = 0; 

} 

$boatype_hash{ j_res }{ $bill }++; 

} 

} 

elsif( $list[l] eq "Res" ) { 
$res_count++ ; 

foreach my $bill ( sort keys %{$tempH{$prev_h2} } ) { 
if ( $boatype_hash{ res }{ $bill } eq "" ) { 
$boatype_hash( res }{ $bill } = 0; 

} 

$boatype_hash{ res }{ $bill }++; 

} 

} 

else { 

print "$prev_h2\tERR0R in recording boatype hash\n"; 

} 

} 

$list = ""; 

} 

$prev_h2 = $h2 ; 
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# SKIP OVER header SECTIONS 
#<header> 

#<p align="center" bold="on" italic="of f " ><f ieldxf Idinst> PAGE 

</f Idinstxf Idrsltxstring charstyname = " " bold="on" italic="of f ">2</string> 

# </fldrslt></f ield></p> 

#cp align="right" bold="on" italic="of f "xstring bold="on" italic="off ">Paul 
S. Sarbanes</string></p> 

#<p align="left" bold="on" italic="off "xstring bold="on" italic="of f ">SECTION 
IV. COSPONSORED MEASURES</string></p> 

#<p align="left" bold="on" italic="of f'xstring bold="on" 

italic="off ">&:tab;&tab; ORGANIZED BY COMMITTEE REFERRAL</string></p> 
#<p align="left" bold="on" italic="of f'xstring bold="on" 
italic="off ">S;tab;&:tab; SENATE: AGRICULTURE</string></p> 
#<p align="left" bold="off" italic="of f "x/p> 
#</header> 

elsif ( $line =- m/<header/ ) { 

while ( $line = <SEN106> ) { 
$ 1 ine_number++ ; 

if ( $line =- m/<p align= .+>(.+) <\/p>/ ) { 
$h4 = $1; 
$i++; 

if ( $i == 3 && ( $section == 2 | | $section == 3 ] | $section 

== 4 ) ) { 

$h4 =~ m/SECTION (.+)\. .+<\/string>/; 
$h5 = $1; 

if ( $h5 eq "III" ) { 
$committee = 3 ,- 

} 

elsif ( $h5 eq "IV" ) { 
$comniittee = 4 ,- 

} 

else { _ 

} 

} 

elsif ( $i == 5 && ( $committee == 3 | | $comniittee == 4 ) ) { 
$h4 =- m/SECTION (.+)\. . +< \/ string>/ ; 
$h4 =- m/Sitab; &tab;\s+ ( . +) <\/string>/; 
$h6_l = "COMMITTEE"; 
$h6_2 = "$1"; 

my $ov = $senid . "_" . "$line_number" ; 

# $occurrence_value{ $ov } = " " ; 

my $len = $#occurrence_committee; 
$occurrence_committee C $len + 1 ] = [ ($senid, 
$line_number , $committee, $h6_2) ] ; 
} 

} 
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my $ov = $senid . "_" . "$line_number" ; 

$di_value{ $val } = " " ; 

ray $len = $#date_introduced; 

$date_introduced[ $len + 1 ] = [ {$senid, $line_nuinber, $val) ] ; 



if ( $section == 1 | | $section == 3 ) 
$tempH{$h2} {SPONSOR} = "$senid"; 



my $s = $senid; 
$s =- s/ /_/g; 
$s_value{ $s } = ""; 

my $len = $#sponsor; 

$sponsor[ $len + 1 ] = [ {$senid, $s) ] ,- 

} 

elsif ( $section == 2 | | $section == 4 ) { 
$line = <SEN106>; 
$ 1 ine_number++ ; 

<p bold="off" italic="off ">SPONSOR: Daschle</p> 
if ( $line =- m/<p .+>(.+): (.+)<\/p>/ ) { 
my $mysponsor = $1; 
my $value = $2; 
$tempH{$h2}{ SPONSOR} = $value ; 



my $ov = $senid . "_" . " $line_number" ; 

my $s = $value ; 
$s =- s/ /_/g; 
$s_value{ $s } = " " ; 

my $len = S#sponsor,- 

$sponsor[ $len + 1 ] = [ ($senid, $line_ 



elsif ( $section == 5 ) { 

<p>SUBMITTED FOR: S. 4 Sctab; CONGRESSIONAL RECORD: S1830</p> 

$line = <SEN106>,- 
$line_number++ 

if ( $line =- m/<p>(.4-)<\/p>/ ) { 
my $submit = $1; 
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my $ov = $senid . "_" . 

my $s = $senid; 
$s =- s/ /_/g; 
$s_value{ $s } = " " ,- 

my $lerL = $#sponsor,- 

$sponsor[ $len + 1 ] = [ ($senid, $s) ]; 

# 

$line = <SEN106>; 
$line_nuraber++ ; 

if ( $line =- in/<p>(. + ): (.+)<\/p>/ ) { 
my $submitted_by = $1; 
my $value = $2; 

$tempH{$h2} {SUBMITTED_BY} = $value; 



my $ov = $seiiid . "_" . " $line_number" ; 

my $sb = $value ; 
$sb =- s/ /_/g; 
$sb_value{ $sb } = ""; 

my $len = $#submitted_by ; 

$siabmitted_by [ $len + 1 ] = [ ($senid, $line_number , $sb) 

] ; 

# 

} 

elsif ( $line =- m/<p align= .+>(.+) <\/p>/ ) { 
$h3 = $1; 
goto N4; 

} 

} 

elsif ( $section == 6) { 

# <p>SUBMITTED FOR: S. 4 &tab; CONGRESSIONAL RECORD: S1830</p> 

# <p>SPONSOR: Murray</p> 

# <p>SUBMITTED BY: Bingaman</p> 
$line = <SEN106>,- 

$1 ine_number++ ; 

if ( $line =- m/<p>(.+)<\/p>/ ) { 
my $siibmit = $1; 
. my ($partl, $part2) = split (/&tab; / , $submit) ; 
my ($partl_l, $partl_2) = split(/: /, Spartl) ; 
my ($part2_l, $part2_2) = split (/: /, $part2) ; 

$partl_2 =- s/\s*//g; 

$tempH{$h2} {SUBMITTED_FOR} = $partl_2; 
# 
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$tempH{$h2}{C0NGRESSI0NAL_REC0RD} = $part2_2 ; 



$ov = $senid . "_" . " $line_number" ; 

my $cr = $part2_2 ; 
$cr =- s/ /_/g; 
$cr_value{ $cr } = " " ; 

$len = $#congressional_record; 
## Scongressional_record[ $len + 1 ] = [ (Ssenic 

$line_nuinber, $cr, Sh2 ) ] ; 

$congressional_recordt $len + 1 ] = [ ($senid, 
$line_nuitiber, $cr) ] ,- 



$line = <SEN106>; 
$ 1 ine_number + + ; 

if ( $line =- ni/<p>(. + ): (.+)<\/p>/ ) 
my $mysponsor = $1; 
my $value = $2; 

$tempH{$h2} {SPONSOR} = $value,- 



my $ov = $senid . "_" . " $line_number" ; 

my $s = $value; 
$s =- s/ /_/g; 
$s_value{ $s } = ""; 

my $len = $#sponsor; 

$sponsor[ $len + 1 ] = [ ($senid, $line_number 



} 

$line = <SEail06>; 
$1 ine_number++ ; 

if { $line =- m/<p>(.+): (.+)<\/p>/ ) 
my $submitted_by = $1; 
my $value = $2; 

$tempH{$h2} {SUBMITTED_BY} = $value 



"$line_number" ,- 



my $sb = $value; 
$sb =- s/ /_/g; 
$sb_value{ $sb } = ""; 

my $len = $#subraitted_by; 
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N4:if ( $h3 eg "COSPONSORS" ) { 
# <p bold="off" italic="of f ">Edwards; Bayh; Kerry; Bingaman (A- 

11/05/1999) :</p> 

$line = <SEN106>; 

$ lirLe_nuraber++ ; 

if ( $line =- m/<p .+>(.+) <\/p>/ ) { 
my $mycosponsors = $1; 

if ( ($mycosponsors ne "") && ( $mycosponsors ne "NONE") ) • 
$tempH{$h2} {COSPONSORS} = $mycosponsors ,- 



my $ov = $senid . "_" . " $line_nutnber" ; 

my $cs = $mycosponsors; 
Scs =- s/ /_/g; 
$cs_value{ $cs } = ""; 

my $len = $#cosponsors ; 

$cosponsors[ $len + 1 ] = [ ($senid, $line_nuTnber, $cs) ]; 



} 

else { 

$teinpH{$h2} {COSPONSORS} = "NONE", 



my $ov = $senid . . "$line_nuTnber" ; 

my $cs = $mycosponsors,- 

$cs =- s/ /_/g; 

$cs value{ $cs } = ""; 

my $len = S#cosponsors; 

$cosponsors [ $len + 1 ] = [ ($senid, $line_nuraber, $cs) ]; 



} 

} 

else { 

print LOG "Ml error : $senid : $h2 : $h3 COSPONSORS tag ! I ! ! \n" ,- 

} 



elsif ( $h3 eq "OFFICIAL TITLE" ) { 

<p bold="off" italic="off "> </p> 

$line = <SEN10S>; 
$ 1 ine_number++ ; 

if ( $line =- m/<p .+>(.+) <\/p>/ ) { 

my $title = $1; 

$tempH{$h2}{0FFrCIAL_TITLE} = $title; 



my $ov = $senid . "_" . " $line_numi>er" ; 
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elsif ( $h3 eq "LATEST STATUS" | | $h3 eq "STATUS ACTIONS" ) { 

# <p bold="off "xstring bold="on" >Oct 25, 1 99 9&tab; Became Public 
Law No: 106-80 . </string></p> 

# <p><string>May 27, 1999&tab; Proposed amendment S.Amdt. 387 
withdrawn in Senate . </string></p> 

# <p><string>May 27, 1999&tab; Proposed by Senator Levin for Senator 
Sarbanes . </string></p> 

# <p align=" center" italic="of f ">ABSTRACT</p> 
my $cumulative_content = " " ; 

my $i = 0; 

my $save_line_number ; 
while ( $line = <SEN106> ) { 
$1 ine_number++- ; 

if { $i == 0 ) { $save_line_number = $line_number ; } 
$i++; 

if ( $line =- m/<p.*><string.*>(.+)<\/string><\/p>/ ) { 
my $content = $1; 

$cumulative_content .= "$content CCCRRR " ; # replace \n 

with " CCCRRR " 

} 

else { 

if { $h3 eq "LATEST STATUS" ) { 

$tempH{$h2}{LATEST_STATUS} = $cumulati 

} 

elsif ( $h3 eq "STATUS ACTIONS" ) { 

$tempH{$h2}{STATUS_ACTIONS} = $cumulative_ 



my $ov = $senid . "_" . "$line_nutnber" ; 

if ( $h3 eq "LATEST STATUS" ) { 

my $status = $cumulative_content ,- 
$status =- s/ /_/g; 
$ls_value{ $ status } = ""; 

my $len = $#latest_status ; 

$latest_status [ $len + 1 ] = [ ($senid, $line_number , 

} 

elsif ( $h3 eq "STATUS ACTIONS" ) { 
my $status = $cumulative_content ; 
$status =- s/ /_/g; 
$sa_value{ $status } = 

my $len = $#status_actions; 

$status_actions [ $len + 1 ] = [ ($senid, $line_number , 

} 

goto N2; 
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} 

elsif ( $h3 eq "ABSTRACT" ) { 
<p italic="off ">NONE</p> 
$line = <SEN106>; 
$line_number++ ; 

if ( $line =- m/<p.*>(.+)<\/p>/ ) { 
my $abstract = $1; 

$tenipH{$h2} {ABSTRACT} = $abstract; 



my $ov = $senid . "_" . " $line_number" ; 

my $a = $abstract ; 
$a =- s/ /_/g; 
$a_value { $a } = " " ; 

my $len = $#abstract; 

$abstract [ $len + 1 ] = [ ($senid, $line_number , $a) ] ; 



} 



le { 

print LOG "!!! error : $senid: $h2 : $h3 ABSTRACT tag 



elsif ( $h3 eq "STATEMENT OF PURPOSE" ) { 

<p align="center" italic="off ">STATEMENT OF PURPOSE</p> 

<p italic="of f ">. . ,</p> 
OR 

<p align="center" italic="of f ">ABSTRACT</p> 



my $ov = $senid . "_" . " $line_number" 



if { $line =- m/<p italic= .+>(.*> <\/p>/ ) { 
my $stmt = $1; 

if ( $stmt eq "" ) { 

$tempH{$h2) {STATEMENT_0F_PURP0SE} = $stmt; 

} 

else { 

$tempH{$h2}{STATEMENT_0F_PURPOSE} = $stmt; 



my $sop = $stmt; 
$sop =- s/ /_/g; 
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my $sop = " " ; 
$sop_value{ $sop } = " " ; 

my $len = $#statement_of_purpose ; 

$statement_of _purpose [ $len + 1 ] = [ ($seiiid, $line_nuTnber , 

$sop) ] ; 



goto N2; 

} 

else { 

print LOG "!!! error : $senid: $h2 : $h3 STATEMENT OF PURPOSE tag 

} 



lif ( $h3 eq "DIGEST" ) { 
<p italic="of f ">NONE</p> 
$line = <SEN106>; 
$line_number++ ; 

if ( $line =- m/<p .+>(.+) <\/p>/ ) { 
my $inydigest = $1; 
$teTnpH{$h2} {digest} = $mydigest; 



my $ov = $senid . " 

my $d = $mydigest; 
$d =- s/ /_/g; 
$d_value{ $d } = 

my $len = $#digest; 
$digest [ $len + 1 ] 



" $ 1 ine_nuinbe r " 



[ ($senid, $1±: 



!_number , $d) ] ; 



# END WHILE 



■:$senid:$h2:$h3 DIGEST tag 



■:$senid:$h2:$h3 UNKNOWN tag !M!\n" 



THEEND : ; 

$prev_h2 = $h2 ; 



} # comment out foreach loop 



my $buff = 
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iinputstr eq "q" 

■ ( $ input str eq 
( $ input str eq 
( $ input str eq 
( $ input str eq 
( $ input str eq 
( $ input str eq 
( $ input str eq 
{ $ input str eq 
( $ input str eq 
( $ input str eq 
{ $inputstr eq 
{ $inputstr eq 
( $inputstr ea 
( $ input s 

:( $ inputs 

■ ( $inputstr eq 
: ( $inputstr eq 
: ( $inputstr eq 
: ( $inputstr eq 
: ( $inputstr eq 

{ $inputstr eq 
$inputstr eq 

( $inputstr eq 

( $inputstr eq 

( $inputstr eq 

( $inputstr eq 

( $inputstr eq 



elsif ( $inputstr eq ' 

elsif ( $inputstr eq ' 

elsif { $inputstr eq ' 

elsif { $inputstr eq ' 

elsif ( $inputs 

elsif ( $inputs 



if{ $: 
els 
elsif 
elsif 
elsif 
elsif 
elsif 



elsif 
elsif 
elsif 
elsif 
elsif 
elsif 
els: 
els: 



elsif 
elsif 
elsif 
elsif 
elsif 
elsif 
elsif 
elsif 



'ha" ) 
'cr- ) 
'her" ) 

'di" ) 
'hdi" ) 



'ot" ) 
■hot" ) 

'hs" ) 
'sop" ) 
'hsop" ) 
'sb" ) 
'hsb" ) 
'sf" ) 
'hsf" ) 
'ti" ) 



'hti' 



iff $inputstr eq 
ie { print "\t\t*' 



{ goto MYEND; 

{ &print_table 

{ &print_table 

{ &print_hash 

{ &print_table 

{ &print_hash 

{ &print_table 

{ &print_hash 

{ &print_table 

{ &print_hash 

{ S:print_table 

{ &print_hash 

{ &print_table 

( &print_hash 

{ &print_table 

{ &print_hash 

{ &print_table 

{ &print_hash 

{ &;print_table 

{ &print_hash 

{ &print_table 

{ Scprint_hash 

{ &print_table 

{ &print_hash 

{ &print_table 

{ 5cprint_hash 

{ &print_table 

{ Scprint_hash ( 

{ &print_table 

{ SEprint_table 

{ &print_table 

{ &print_table 

{ &print_table 

{ S:print_table 



"boa" ) ; } 
"abstract" ) ,- } 
"a_value" ); } 
" congress ional_record" 
"cr_value" ) ,- } 
"cosponsors" ) ; } 
"cs_value" ) ; } 
"date_introduced" ) ; } 
"di_value" ) } 
"digest" ) ; } 



); 

alue" ) ; } 



); } 



) ; 



offi< 



lue" ) ; } 
ial_title" ) ; 
ot_value" ) ; } 
sponsor" ) ; } 
s_value" ) ; } 
statement_of__purpoi 
sop_value" ) ; } 
submit ted_by" ) ; } 
sb_value" ) ,- } 
subtnitted_for" ) ; 
sf_value" ) ; } 
topic_index" ) ; } 
ti value" ) ; } 



); } 

); } 

"f ilename_period" ) ; } 
"prepared_by" ) ; } 
"occurrence_section" ) ; } 
"occurrence_committee" ) ; 



' WRONG OPTION ■> 



S:pretty_print_tables ( "106" ) ; 



*\n", 



S:print_prompt ; 
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print 


"\t\"d\". 


digest 




\ 


'hd\" . 


d_value\n" ; 


print 


••\t\"ls\" 






\ 


'hls\" 


ls_value\n" ; 


print 


"\t\"sa\" 


status~actions 




\ 




sa_value\n" ; 


print 


"\t\"ot\" 


official title 




\ 


'hot\" 


ot_value\n" 


print 


"Vt\"s\". 






\ 


'hs\" . 


s_value\n" ; 




"\t\" sop\ 


. statement_of_purpose . 




\ 




. sop_value\n" ; 


print 


"\t\"sb\" 


submitted by 




\ 


•hsb\" 


sb_value\n" ; 


print 


"\t\"sf\" 


submitted for 




\ 


■hsf\" 


sf value\n"; 


print 


"\t\"ti\" 


topic index 




\ 


•hti\" 


ti_value\n" ,- 


print 


"\t\n"; 












print 


"\t\"l\". 


f ilename_senator 


\ 


2\ 




itor\n" ; 


print 


"\t\"2\". 


f ilename_period 


\ 


4\ 


• . prepared_by\n" ; 




"\t\"S\". 


occurrence_section 


\ 


6\ 


' . occurrence_committee\ 


print 


"\t\n"; 












print 


"\t\"pp\" 


pretty print (Prolog) -- 


-\n" ; 












*\i 









sub pretty_print_tables { 
my $sid = $_[0] ; 

my $DIR = "Prolog/sen_$senid" ; 
mkdir $DIR, 0755; 

open( PP, ">$DIR/boa.P" ) | | die "*ERROR: can't open\n" ; 
&pretty_print ( "boa" ) ; 
close PP; 

open( PP, " >$DIR/ abstract . P" ) || die "*ERROR: can't open\n" ; 
&pretty_print ( "abstract" ); 
close PP; 

open{ PP, ">$DIR/congressional_record. P" ) || die "*ERROR: can't open\n 
Stpretty_print ( "congressional_record" ); 
close PP; 

open{ PP, ">$DIR/cosponsors.P" ) || die "*ERROR: can't open\n" ; 
&pretty_print ( "cosponsors" ); 
close PP; 

open( PP, ">$DIR/date_introduced.P" ) || die "*ERROR: can't open\n" ; 
&pretty_print ( "date_introduced" ); 
close PP; 

Dpen( PP, ">$DIR/digest.P" ) | | die "*ERROR: can't open\n" ; 
&pretty_print: ( "digest" ); 

open{ PP, ">$DIR/latest_status.P" ) || die "*ERROR: can't open\n" ; 
&pretty_print ( "latest_status" ); 
close PP; 

open( PP, ">$DIR/status_actions.P" ) |] die "*ERROR: can't open\n" ; 
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open( PP, ">$DIR/statement_of_purpose . P" ) || die "*ERROR: can't open\n 
&prettY_print ( "stateraent_of_purpose" ); 
close PP; 

open( PP, ">$DIR/submitted_by .P" ) | | die "*ERROR: can't open\n" ,- 
&pretty_print ( " submit ted_by" ); 
close PP; 

open( PP, ">$DIR/submitted_for.P" ) || die "*ERROR: can't open\n" ; 
&pretty_print ( "submitted_f or " ); 
close PP; 

open( PP, ">$DIR/topic_index.P" ) | | die "*ERROR: can't open\n" ; 
Sipretty_print ( "topic_index" ); 
close PP; 

open( PP, ">$DIR/f ilename_senator.P" ) | | die "*ERROR: can't open\n" ; 
&pretty_print ( "f ilename_senator" ); 
close PP; 

open( PP, ">$DIR/filename_period.P" ) | | die "*ERROR: can't open\n" ; 
&pretty_print ( "f ilenaine_period" ); 
close PP; 

open( PP, ">$DIR/senator.P" ) || die "*ERROR: can't open\n" ; 
&;pretty_print ( "senator" ); 
close PP; 

open{ PP, ">$DIR/prepared_by.P" ) | | die "*ERROR: can't open\n" ; 
&pretty_print ( "prepared_by" ) ; 
close PP; 

open( PP, ">$DIR/occurrence_section.P" ) | | die "*ERROR: can't open\n" 
S:pretty_print ( "occurrence_section" ); 
close PP; 

open( PP, " >$DIR/occurrence_committee , P" ) || die "*ERROR: can't open\n 
S;pretty_print ( "occurrence_comTnittee" ); 
close PP; 



sub pretty_j3rint { 

my $arg_table = $_[0] ; 

# digest ( ' quote* strings ' , . . , . . , . . ) . 

my $buff = ""; 
my ®arr = " " ; 
my $inputstr = ""; 

no strict; 
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else { 

print PP "\ ' $aref-> [$j] \ ' , 

} 

} 

$aref->[$n] =- s/_/ /g; 

if( $arg_table eq "'latest_status" || $arg_table eg "status_actions" ) 

$aref->[$n] =- s/ CCCRRR /\n/g; 

my olist = split (/\n/, $aref -> [$n] ) ; 

my $len = $#list; 

my $newstr = " [" ; 

foreach my $i (0 . . $len ) { 

my ($ls_date, $ls_mesg) = spl it ( /&tab; / , $list[$i]); 

$newstr .= "d ( \ ' $ls_date\ ' , " ; 

$newstr .= " \ ' $ls_mesg\ ' ) " ; 

if( $len > 0 && $i < $len ) { 

} - . ' 

} 

$newstr .= "] " ; 
$aref->[$n] = $newstr; 
print PP "$aref-> [$n] " ; 
print PP " ) \ . \n" ; 



elsif( $arg_table eq "cosponsors" ) { 
#Dodd; Bryan; Leahy; Edwards; Hollings; Breaux (A- 02/08/2000 ) : 

if ( $aref->[$n] =- m/.+:/ ) { 
chop $aref->[$n]; 

} 

my ©colist = split (/;/, $aref -> [$n] ) ; 
my $newstr = " [" ; 
foreach my $item (®colist) { 
my $items = ""; 

$item =- m/\s*(.+)/ && ($items = $1); 
my $co_name = 
my $co_amend = " " ; 
# Mikulski (A-ll/08/1999> 

if ( $items =- m/(.+) \{(.+)\)/ ) { 

$co_name = $1; 

$co_araend = $2 ; 

Snewstr . = "d ( \ ' $co_name\ ' , \ ' $co amend\ ' ) , 

} 

^Ise { 

my $cosponsor_val = $items; 
$newstr .= " \ ' $cosponsQr_val\ ' , "; 

} 

} 

chop $newstr; 
chop $newstr; 
$newstr . = " ] " ; 
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sub print_bills { 
my $bill; 
my $field; 

foreach $bill ( sort keys %HoH ) { 
print "<bill name=\"$bill\">\n" ; 
my $flag_NONE = "false" ; 

foreach $field ( sort keys %{ $HoH{$bill} } ) { 
if( $field eq "DATE_INTRODUCED" ) { 
print " 

<date_introduced>$HoH{$bill} {$f ield}</date_introduced>\n"; 
} 

# 

elsif( $field eq "SPONSOR" ) { 

my $sponsor_val = $HoH{ $bill } { $f ield} ; 
my $val = " " ; 
my $print_f ield; 

$sponsor_val =- m/*(\d+)/ && ($val = $1) 
if { $val ne •"' ) { # then it ' s a number 
my $key = SsenNUMHash{ $val } ; 

Sprint_field = $keypeopleHash{ $key }{ nameURI } ; # use of 
uninitialized value! ! ! 

print " <sponsor>$print_f ield</sponsor>\n" ; 

} 

else { 

$sponsor_val =- s/ //g; 
$sponsor_val = Ic $sponsor_vaL 

if ( exists! $keypeopleHash{ $sponsor_val } ) ) { 

$print_field = $keypeopleHash{ $sponsor_val }{ nameURI 

}; 

print " <sponsor>$print_f ield</sponsor>\n" ; 

} 

else { 

print LOG "!!! In print_bills: in SPONSOR section: 
keypeopleHash{ $sponsor_val } DOES NOT EXIST !\n"; 

print " <sponsor>$HoH{$bill} {$f ield}</sponsor>\n" ; 

} 

} 

} 

# 

elsif ( $field eq "COSPONSORS" ) { 

my $cosponsors = $HoH{$bill} {$f ield} ; 
if ( $cosponsors =- m/.+:/ ) { 

chop $cosponsors; 
} #=== COMMENT if you want COSPONSOR=NONE to disappear 

print " <cosponsors>\n" ; 
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if ( $items =- in/(.+) \((.+)\)/ ) { 
$co_name = $1; 
$co_amend = $2 ; 
my $cosponsor_val = $co_naine; 
my $val = " " ; 
my $print_f ield; 
$cosponsor_val =- s/ //g; 
$cosponsor_val = Ic $cosponsor_val 
if ( exists ( $keypeopleHash{ $cosponsor val } ) ) 

{ 

$print_field = $keypeopleHash{ $cospoiisor_val 

} { nameURI } ; 

print " <co_name a- 

date=\" $co_amend\ " >$print_f ield</co_name>\n" ; 

} 

else { 

print LOG "!!! In print_bills: in COSPONSORS 
section: keypeopleHash{ $cosponsor_val } DOES NOT EXIST! \n"; 

print " <co_nanie a- 

date=\ " $co_amend\ " >$co_name</co_name>\n" ; 

} 

} 

else { 

my $cosponsor_val = $items; 

my $val = " " ; 

my $print_f ield; 

$cosponsor_val =- s/ //g; 

$cosponsor_val = Ic $cosponsor_val ; 

if ( exists ( $keypeopleHash{ $cosponsor val } ) ) 

{ 

$print_field = $keypeopleHash{ $cosponsor_val 

} { nameURI } ; 

print " 

<co_name>$print_field</co_name>\n" 
} 

else { 

print LOG "!!! In print_bills: in COSPONSORS 

section: keypeopleHash{ $cosponsor_val } DOES NOT EXIST !\n"; 

print " 

< c o_name >$items</c o_name > \ n " 

} 

} 

print " </cosponsor>\n" ; 

} 

print " </cosponsors>\n" ; 
# ) = = = UNCOMMENT if you want COSPON-SOR=NONE to disappear 

} 

elsif ( $field eq "OFFICIAL_TITLE" ) { 
print " 

<official_title>$HoH{$bill}{$f ield}</official_title>\n"; 
} 

elsif ( $field eq " LATEST_STATUS " ) { 
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elsif ( $field eq "ABSTRACT" ) { 

print " <abstract>$HoH{$bill} {$f ield}</abstract>\n" ; 

} 

elsif ( $field eq "COMMITTEE" ) { 

my $len = $#{ $HoH{ $bill } {COMMITTEE} } ; 
print " <comraittees>\n" ; 
foreach my $i (0 . . $len ) { 

<committee>$HoH{$bill}{$f ield} [$i] </comtnittee>\n" ; 

} 

print " </committees>\n" ; 

} 

elsif ( $f ield eq "SUBMITTED_FOR" ) { 

<subTnitted_for>$HoH{$bill} {$f ield}</submitted_for>\n" ; 
} 

elsif { $field eq "CONGRESSIONAL_RECORD" ) { 

< congress ional_record>$HoH{$bill} { $f ield} </congressional_record>\ni' ,- 
} 

# 

elsif ( $field eq "SraMITTED_BY" ) { 

my $submitted_by_val = $HoH{ $bill } { $f ield} ; 
my $val = " " ; 
my $print_f ield; 

$submitted_by_val =- s/ //g; 
$submitted_by_val = Ic $submitted_by_val ; 
if ( exists ( $keypeopleHash{ $submitted_by_val } ) ) { 
$print_field = $keypeopleHash{ $siabmitted_by_val }{ 

nameTOI } ; 

print " 

<submitted_by>$print_f ield</ submit ted_by>\n" ; 
} 

else { 

print LOG "!!! In print_bills: in SUBMITTEED_BY 
section: keypeopIeHash{ $submitted_by_val } DOES NOT EXIST! \n"; 
print " 

<siabTnitted_by>$HoH{ $bill } { $f ield} </submitted_by>\n" ; 
} 

} 

elsif { $field eq "STATEMENT_OF_PURPOSE" ) { 

<statement_of_purpose>$HoH{$bill} { $f ield} </statement_of_purpose>\n" ; 
} • 

elsif ( $field eq "DIGEST" ) { 

print •' <digest>$HoH{$bill}{$field}</digest>\n"; 

} 

else { 

print LOG "M! WRONG TAG: $f ield ! ! ! in sub: print_bills\n" ; 

} 

} 
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my $len = $#{$arg_table} + 1; 
print "$arg_table: COlINT=$len\n" ; 
for my $i (0 .. $# { $arg_table } ) { 

if ( ! ($i % $MORE_SIZE ) && 1 ($i == 0) ) { 
my $percent = ( ($1+1) / $leii ) * 100; 
print "--More--"; 
printf( "{%ld)", $percent ) ,- 
print "\% -- q: to quit\n"; 
readCSTDIN, $buff, 1) ; 



for ( my $1 = 0; ; read(STDIN, $buff, 1) ) 
if { $i == 0 ) { @arr=""; } 
$arr[$i++] = $buff; 
if { $buff eq "\n" ) { 

$inputstr = join '', @arr; 

chop $ input St r; 

goto NEXT; 

} 

$buff = ""; 

} 

} 

NEXT: 

if( $inputstr eg "q" ) ( goto END_PRINT_TABLE; 



ray $aref = ${$arg_table} [$i] ; 
my $n = ®$aref - 1; 
printf ( "\t%5d: " , $i ) ; 
for my $j ( 0 . . $n ) { 

print "\t$aref->E$j] 

} 

print "\n"; 

} 

END_PRINT_TABLS : ; 

} 

sub print_hash { 

my $arg_hash = $_[0] ; 



no strict; 



my $buff = ""; 
my @arr = ' " " ; 
my $inputstr = 



my $len = scalar keys % { $arg_hash} ; 
print "$arg_hash Hash: COUNT=$len\n" ; 
my $i = 0; 

foreach my $key{ sort keys %{$arg_hash} ) { 

if ( ! {$i % $MORE_SIZE ) && ! ($i == 0) ) { 
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■ ( my $i = 0; ; read(STDIN, $buff, 1) ) 
if ( $i == 0 ) { ®arr=""; } 
$arr[$i++] = $buff; 
if ( $buff eq "\n" ) { 

$inputstr = join ®arr; 

chop $inputstr; 

goto NEXT2; 

} 

$buff = ""; 



NEXT2 : 

if ( $inputstr eq "q" ) { goto END_PRINT_HASH ; 
$i++; 

print "\t*$k:ey*\n" ; 

) 

E1ID_PRINT_HASH: ; 



#<p fontnarae=" Courier New" fontsize="20" ></p> 

#<p align="center" fontname= "Courier New" fontsize="28" bold="on" >UNITED 
STATES SENATE</p> 

#<p align="center" fontname=" Courier New" f ontsize="28" boId="on" ></p> 

#<p align="center" fontname= "Courier New" f ontsize="28 " bold="on" ></p> 

#<p align=" center" f ontnaine="Courier New" f ontsize="28" bold="on" ></p> 

#<p align=" center" fontname= "Courier New" f ontsize="28" bold="on" ></p> 



#<p align="left" 


f ontname= 






fontsize= 


48' 








bold= " on"> 






^</p 












#<p align="left" 


fontname= 


Courier 


New" 


fontsize= 


48" 


bold= 


on' 


></p> 


#<p align="left" 


f ontnaine= 




New" 


fontsize= 


48" 


bold= 




>LEGISLAT 


ACTIVITIES</p> 


















#<p align="left" 


f ontname= 


Courier 


New" 


fontsize= 


•48' 








bold="on"> 






</p 












#<p align="left" 


f ontname= 


Courier 


New" 


fontsize= 


48' 


bold= 




></p> 


#<p align="left" 


f ontname= 






foiitsize= 


'36' 


bold= 


on 


>THE 


HONORABLE</p> 


















#<p align="left" 










•36' 


bold= 




>PAUL S. 


SARBANES</p> 


















#<p align="left" 


f ontname= 


Courier 






•36^ 


bold= 




>0F 


MARYLAND </p> 


















#<p align="left" 


fontname= 


Courier 






'36' 


bold= 




></p> 


#<p align="left" 


fontname= 


Courier 


New" 


fontsize= 


•28' 


bold= 


on 


>FOR THE 


PERIOD</p> 


















#<p align="left" 


fontname= 


Courier 


New" 


fontsize= 


•28' 


bold= 


on 


>JANUARY 


1999 TO MARCH 31 


2000</p> 
















#<p align="left" 


fontname= 


Courier 




fontsize= 


'28' 


bold= 


on 


></p> 


#<p align="left" 


fontname= 




New" 


fontsize= 


'48' 









bol d= " on " > < /p > 
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#<p fontname= "Courier New" 


fontsize="20' 


' bold=" 


on"></p> 




#<p f ontnaTne="Courier New" 


font size 


5="20' 


' bold=" 


on"></p> 




#<p f ontname="Courier New" 




5="20' 


' bold=" 


on"></p> 




#<p f ontname="Courier New" 


font size 


i="2Q' 


' bold=" 


on"></p> 




#<p fontnaine= "Courier New" 




5="20' 


• bold=" 


on"></p> 




#<p align="left" foritname=' 






fontsiz 


e="20" bold=' 


'on" >Prepared 


by:</p> 












#<p align="left" foiitname=' 


" Courier 


New" 


fontsiz 


e="20" bold=' 




Computer Center</p> 












#<p align="left" fontname=' 


" Courier 




fontsiz 


e="20" bold=' 


'on">Office of 


Sergeant at Arms</p> 












#<p align="left" fontname=' 








e="20" bold=' 




#<p align="left" fontname=' 








e="20" bold=' 


'on" >Committee 


Rules and Administration</p> 











sub process_header { 

while ( $line = <SEN106> ) { 
$line_nuraber++ ; 

if ( $line =- m/<p align=. +>(.+) <\/p>/ ) { 
$h4 = $1; 

if ( $line — m/THE HONORABLE/ ) { 

$line = <SEN106>; 
$ line_number++ ; 

$line =- m/<p align= .+>(.+) <\/p>/ ; 
$senNAME= $1; 

my $first_line = $line_nuniber ; 

$line = <SEN106>; 

$ 1 ine_number ++ ,- 

$line =- m/OF (.4-)<\/p>/; 

$state = $1; 

my ®senlist = split (/ /, $senNAME) ,- 
my $firstname = $senlist [0] ; 
my $lastname = $senlist [$#senlist] ; 
my $middlepart = " " ; 

foreach my $i (1 .. ( $#senlist -1) ) { 
$middlepart .= "$senlist [$i] " ; 

} 

chop $middlepart ; 
$len = $#senator; 

$senator[ $len + 1 ] = [ ($senid, $first_line, $firstname, 
$middlepart, $lastname, $state) ] ; 



$line = <SEN106>; 
$Iine_number++ ; 
$line = <SEN106>; 
$ line_number++ ; 
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$line = <SEN10e>; 
$ 1 ine_nuTnber + + ; 

$line = <SEN106>; 
$ 1 ine_nuitiber+ + ; 

while ( $line = <SEN106> ) { 
$1 ine_nuTnber++ ; 

if ( $line =- m/<p align=/ ) { 
goto BEGIN_Prepared_by; 

} 

} 

BEGIN_Prepared_by: 

my $first_line2 = $lin 
$line =- m/>{.+)<\/p>/ 
my $cumulative_content 

$line = <SEN106>; 
$ 1 ine_numbe r + + ; 
$line =- m/>(.+)<\/p>/ 
$cumulative_content .= 

$line = <SEN106>; 
$line_number++ ; 
$line =- m/>(.+)<\/p>/ 
$cumulative_content .= 

$line = <SEN106>; 
$ 1 ine_number++ ; 
$line =- m/>(.+)<\/p>/ 
$cumulative_content .= 

$line = <SEN106>; 
$1 ine_number++ ; 
$line =~ m/>(.+)<\/p>/ 
$cumulative_content . = 

my $ov = $senid . "_" 

$len = $#pr,epared_by; 
$prepared_by E $len + 1 ] = E C$senid, $f irst_line2 , 
$cumulative_content) ] ; 
} 

$h4 = $1; 

} 

elsif ( .$line =- m/<\/section>/ ) { 
goto PHI; 

} 

} 

PHI: ; 

if ( $filename eq "D_1_LARI_S272_106 .xml" ) { 



,e_number ; 
= "$1 

"$1 "; 

'"$1 "; 

'"$1 "; 

. " $line_number" ; 
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while ( $line = <SEN105> > { 
$line_nuniber++ ; 

if ( $line =- m/<\/section>/ ) { 
goto END_process_header; 

} 

} 

END_process_header : ; 



# <p><string italic="off" hidden= "on" >SECTION VII . SUBJECT 
INDEX</string>ACADEMIC PERFORMANCE &tab; S.7, S.514, S.564</p=. 

# <p>ACCESS TO HEALTH CARE&tab; S.6, S.1678, S.1690</p> 

sub process_index_old { 

$line =- m/<p>.+<\/string>( .+)&:tab; (. + )<\/p>/; 
my $ subject = $1; 
ray $bill_seq = $2; 

#<p align="right">Administrative procedure --Department of Health and Human 
Services&tab; S.331, S.1327</p> 

#<p>AGED&tab; S.IO, S.51, S.331, S.391, S.472, S.718, S.784, S.792,</p> 

#<p aligns "right "> S.1023, S.1074, S . 1142 , S.1327, S.1499, S.1678, S.1760</p> 

N3: while { $line = <SEN106> ) { 
$line_nuTnber++ ; 

if ( $line =~ m/<p.*>(.+)&tab; (.+\d) (.*)<\/p>/ ) { 
my $subject = $1; 
my $bill_seq = $2; 
my $comma = $3 ; 

# IS THERE A CONTINUATION... 

while ( $comma eq " , " ) { 
my $buf = <SEN106>; 
$line_number++ ; 

$buf=- m/<p.*>(.+\d) (.*)<\/p>/; 
my $bill_seq = $1; 
my $ comma = $2; 
if ( $corama eq "" ) { 
goto N3; 

} 

} 

} 

#<p></p> 

elsif ( $line =~ m/<p><\/p>/ ) { 
} 

else { 

goto THEEND; 

} 

} 

} 

# <p><string italic="off" hidden= "on" >SECTION VII . SUBJECT 

' INDEX</string>ACADEMIC PERPORMANCE&tab; S.7, S.514, S.564</p> 
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#<p align="left" fontname= "Courier New" f ontsize="20 " bold= "on" >Committee on 

Rules and Administration</p> 

#</ sections 

#<section> 

#<header> 

#<p align= "center" fontname= "Courier New" fontsize="20" 

bold="on"><f ield><fldinst> PAGE </f Idinstxf Idrsltxstring charstyname= " " 

# fontname= "Courier New" f ontsize= "20 " 
bold= " on">2</string></fldrslt></f ield></p> 

#<p align="right " fontname=" Courier New" f ontsize="20 " bold="on"> 

# <string fontname=" Courier New" f ontsize="20 " boId="on" >Lincoln D. 
Chaf ee</ string></p> 

#<p align="left" fontname= "Courier New" f ontsize= "20 " bold= "on" >< string 
fontname=" Courier New" f ontsize="20" bold="on"> 

# SUBJECT INDEX TO SPONSORED AND COSPONSORED MEASURES AND 
AMENDMENTS </ s t r i ng ></ p > 

#<p align="left" fontname= "Courier New" f ontsize= "20 " bold= "of f " ></p> 
#</header> 

#<p>ACCESS TO HEALTH CARE&tab; S.494</p> 
#<p>ACCIDENT PREVENTION&tab; S.149, S.936</p> 

sub process_index { 

$line =- m/<p> . +<\/string> ( .+) &tab; (.+)<\/p>/; 
my $ subject = $1; 
my $bill_seq = $2; 

tny ®bill_list = split( /,/, $bill_seq ) ; 

my $sub = $ subject ; 
$sub =- s/ /_/g; 
$ti_value{ $sub } = " " ; 

foreach my $item (@bill_list) { 
$item =- s/\s*//g ; 

my $len = $#topic_index; 

$topic_index[ $len +1 ] = [ ($senid, $line_number , $sub, 

$item) ] ; 

} 

#<p align= "right ">Administrative procedure- -Department of Health and Human 
Services&tab; S.331, S.1327</p> 

#<p>AGED&tab; S.IO, S.51, S.331, S.391, S.472, S.718, S.784, S.792,</p> 

#<p align=" right "> S.1023, S.1074, S.1142, S.1327, S.1499, S.1678, S.1760</p> 

N3: while ( $line = <SEN106> ) { 
$ 1 i ne_numbe r + + ; 

if ( $line =~ m/<p.*>(.+)&tab; (.+\d) (.*)<\/p>/ ) { 
my $subject = $1; 
my $biH_seq = $2; 
my $comma = $3; 

my @bill_list = split ( /,/, $bill_seq ) ; 
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# IS THERE A CONTINUATION. . . 
while ( $comma eq " , " ) { 

my $buf = <SEN106>; 

$line_number++ ; 

$buf=- m/<p.*>(.+\d) (.*)<\/p>/; 
my $bill_seq = $1; 
my $comma = $2; 

my ®bill_list = split ( /,/, $bill_seq ) ,- 
foreach my $item (@bill_list) { 

$item =~ s/\s*//g ; 

my $len = $#topic_index; 

$topic_index [ $len +1 ] = [ {$senid 
$1 ine_nuraber, $subject, $itera) ] ; 

} 

if ( $comma eq ) { 
goto N3 ; 

} 

} 

} 

#<p></p> 

elsif ( $line =~ m/<p><\/p>/ ) { 

) 

else { 

goto THEEND; 
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<!ELEMENT 
<!ELEMENT 
<!ELEMENT 
<! ELEMENT 
<!ELEMENT 
<!ELEMENT 
<!ELEMENT 
<!ELEMENT 
<!ELEMENT 



SLA_collection = (senate_file''')> 

senate_file = (filename, header_page?, section*, subject_index?)> 
header_page = (senator?, report_period?, prepared_by?)> 



senator = 
reportdate = 
section = 
bar = 

resolution = 
bill = 



<! ELEMENT amendment = 



(first_name?, middle_part?, last_name?, state?)> 
(start_date?, end_date?)> 

(sec_number, sec_name, bar*)> 
(bill I resolution | amendment)> 

(joint resolution, concurrent_resolution, simple resolution) 
(bar_id, date_introduced, sponsor?, cosponsors?, official_title, 

(latest_status | status_actions), abstract, committee?)> 
(bar id, date introduced, submitted for, congressional_record, 

sponsor?, submitted_by?, cosponsors, 

statement_of_purpose, (latest_status | status_actions), 

abstract)> 
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# ! /usr/local/bin/perl -w 
use strict; 

# caccf_xml FILE REC_SIZE 
my SrEC_SIZE = 164; 

die "*** Record size must be 164!\n" if ( $ARGV[1] != 164 ); 

my $rec_count = 0; 
my $rec; 



opendN, "< $ARGV[0]") || die "can't read from: $1"; 
$fsize = -s IN; 



print "*** WARNING: file s: 
($REC_SIZE) \n" 

if { $fsize % $REC_SIZE 



ultiple of record size 



open(OUT,"> $ARGV [0] . xml " ) || die "can't write to: $!" 
open(LOG,"> $ARGV [0] .xml . log" ) || die "can't write to: 



print O 

while ( 
print 
print 
print 
print 
print 
print 

if 
print 
print 
print 



<caccf_records>\n" 



pri 

print 

print 

print 

print 

print 
print 
print 



read (IN, $: 
OUT "<rec 
OUT "ms=\ 
OUT "cc=\ 
OUT "tc=\ 
OUT "rn=\ 
LOG "*** 
c_count : \l 
(substr ($re( 
OUT "na=\" 
OUT "dp=\" 
OUT "sn=\" 
OUT "mg=\" 
OUT 
OUT 
OUT 
OUT 
OUT 
OUT 
OUT 
OUT 
OUT 
OUT 
OUT 
OUT 
OUT 



'pg=\ 
'd4=\ 
'hc=\ 
'hs=\ 

'db=\ 
'rc=\ 
'ai = \ 
■ra=\ 
're=\ 
■le=\ 
'ma=\ 
'se=\ 



ec, $REC_ 
no=\"", ■ 
" , substr 
" , substr 
" , substr 
" , substr 
ERROR, 
[ [$rec] ] \] 
ec, 10, 1) 
subst 
substr 
substr 

substr 
substr 
substr 
substr 

substr 
subst 
subst 

subst 
substr 
substr 
substr 



_SIZE) == $REC_SIZE) { 
-+ $rec_count, "\" "; 
:$rec,0,l) ,"\" "; 
($rec, 1,2) , "\" "; 
{$rec,3,2) , "X" "; 
($rec,5,5) , "\" "; 

(substr ($rec, 10, 1) ) , 
,n\n" 

($rec, 11,28) , "\" " ; 
($rec,39,4) , "\" 
($rec,43,9) , "\" 
($rec, 52,4) , "\" 
($rec, 56,2) , "\" 
($rec, 58, 8) , "\" 
($rec,66,20) , "\ 
($rec, 86,2) , "\" 
($rec, 88,5) , "\" 
($rec, 93, 8) , "\" 
($rec, 101, 1) 
($rec, 102, 1) 
($rec, 103, 1) 
{$rec, 104, 2) 
($rec, 106,2) 
($rec, 108, 1) 
($rec, 109, 1) , "\ 



nstead of blank in #11, 



"\" 
"\" 
"\" 
"\" 
"\" 
"\" 
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#j ! /usr/local/bin/perl -w 
use strict; 

# caccf2oracle FILE REC_SIZE 
my $REC_SIZE = 164; 

^die "*** Record size must be 164!\n" if ( $ARGV[1] != 164 ); 
my $rec_count = 0; 
my $fsize; 



:> "JAN", 

FEB" , 
"MAR" , 
"APR" , 
"MAY" , 
" JUN" , 
"JUL" , 
"AUG" , 
"SEP" , 
"OCT" , 
"NOV" , 
"DEC" ) ; 



sub mm_dd_yy { 

my ($a_date> = ®_ ; 

if ($a_date =- m| { [0-9] {2})/( [0-9] {2})/( [0-9] {2}) I ) { 

return "TO_DATE ( ' $2-$month{ $1 } - 19$3 ' ) " ; 
} else { 

print "*** ERROR Rec #$rec_count, mm_dd_yy, not a date >>>$a_date<<< 
return "NULL"; 

} 

}; 

sub yymmdd { 

my {$a_date) = ®_ ; 

if ($a_date =~ m| ( [0-9] {2}) ( [0-9] {2}) ( [0-9] {2}) I ) { 

"TO_DATE ( • $3-$month{$2} -19$1 ' ) " ; 
} else { 

print "*** WARNING Rec #$rec_count, yymmdd, not a date >>>$a_date<<<\n 
return "NULL" ; 

}; 

}; 

sub escapeQuote { 

my ($a_string) = ®_ ; 

if ($a_string =- s/\'/\'\'/g) { 

print "*** NOTE Rec #$rec_count, quote escaped >>>$a_string<<<\n" ; 

} 

return $a_string; 
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open(OUT,"> $ARGV[0] .sql") || die "can't write to: 
open(LOG,"> $ARGV [0] . sql . log" ) || die "can't write 



while (readdN, $rec, $REC_SIZE) == $REC_SIZE) { 


print 


OUT 


"insert into CACCF values { 




print 


OUT 


" ' " , ++ $rec_count ,"',"; 


# internal 


print 


OUT 


" ' " , substr ($rec, 0 , 1) , " ' , " ; # ms 


print 


OUT 




print 


OUT 


" ' " , substr {$rec, 3 , 2) , " ' , " ; # tc 


print 


OUT 


"'", substr ($rec, 5,5) ,"', "; # rn 


print 


OUT 


" ' " , escapeQuote (substr ($rec ,11,28)),"' 


print 


OUT 


" ' ",substr($rec,3 9,4) ,"',"; 


# dp 


print 


OUT 




# sn 


print 


OUT 


" ' " , substr {$rec, 52,4) ,"',"; 


# mg 


print 


OUT 


" ' " , substr ($rec ,56,2) ,"',"; 


# pg 


print 


OUT 


mm_dd_yy { substr ( $rec ,58,8)) 


'■ , " ; # dd 


print 


OUT 


" ' " , escapeQuote (substr ($rec , 66 , 20) ) , " ' 


print 


OUT 


" ' ",substr($rec,a6,2) ,"',"; 


# hs 


print 


OUT 


" ' " , substr {$rec, 88, 5 ),"'," ; 




print 


OUT 


mm_dd_yy ( substr ( $rec ,93,8)) 


" , " ; # db 


print 


OUT 


" ' ", substr ($rec, 101,1) , " ' , " 


# rc 


print 


OUT 


" ' " , substr{$rec, 102 , 1) , " ' , " 




print 


OUT 


" ' ", substr ($rec, 103,1) , " ' , " 


# ra 


print 


OUT 


" ' " , substr($rec, 104 , 2) , " ' , " 


# re 


print 


OUT 


" ' " , substr ($rec, 106 , 2) , " ' , " 


# le 


print 


OUT 


" ' " , substr {$rec, 108 , 1) , " ' , " 


# ma 


print 


OUT 


" ' " , substr($rec, 109, 1) , " ' , " 


# se 


print 


OUT 


" ' " , substr ($rec , 110 , 1) , " ' , " 


# ci 


print 


OUT 


" ' " , substr{$rec, 111, 1) , " ' , " 


# PP 


print 


OUT 


yymTndd ( substr ($rec, 112 , 6) ) , 


, " ; # dt 


print 


OUT 


" ' " , substr ($rec, 118, 1) , " ' , " 


# Ir 


print 


OUT 


" ' ", substr {$rec, 119, 3) , " ' , " 


# br 


print 


OUT 


" ' " , substr ($rec, 122, 2) , " ' , " 


# ag 


print 


OUT 


" ' ", substr ($rec, 124,1) , " ' , " 


# sc 


print 


OUT 


" ' " , escapeQuote (substr ($rec 


125,29) ) , " 




OUT 


" ' " , escapeQuote (substr {$rec 


154,2) ) , " ' 




OUT 


" ' " , escapeQuote (substr ($rec 


156,2) ) , " ' 


print 


OUT 


" ' ", substr ($rec, 158, 2) , " ' , " 


# mc 


print 


OUT 


" ' " , substr ($rec, 160 , 2 ) , " ' , " 


# pr 


print 


OUT 


" ' ", substr ($rec, 152, 2) , " ' " ; 


# fl 




OUT 


") ;\n"; 





print LOG "[$ARGV[0]: read " , $rec_count*$REC_SIZE, 
records x $REC_SIZE] \n" ; 



$rec_cc 



int LOG "*** WARNING: file size = $fsize\n 
if ( $rec_count*$REC_SIZE 1= $fsize ) ; 
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TRANSFORMING REPRESENTATION 
OF DATA OBJECTS INTO SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
DATA OBJECTS 



VERIFYING TRANSFORMED DATA 
OBJECTS USING KNOWLEDGE 
RELEVANT TO COLLECTION 



ARCHIVING SELF-DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF DATA OBJECTS 
WITH SELF-DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF LOGICAL 

STRUCTURE OF COLLECTION AND SELF- 
DESCRIBING, I NFRASTRUCTURE-INDEPENDENT 
REPRESENTATION OF KNOWLEDGE 
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4100 , 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
LOGICAL STRUCTURE OF COLLECTION 



RETRIEVING FROM THE ARCHIVE A 
SELF-DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
KNOWLEDGE RELEVANT TO THE 
COLLECTION 



4104 



CREATING QUERY-ABLE MECHANISM 
IN ACCORDANCE WITH LOGICAL 
STRUCTURE OF COLLECTION 



RETRIEVING FROM THE ARCHIVE A 
SELF-DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
DATA OBJECTS 



4108 



VERIFYING THAT THE DATA OBJECTS 
ARE CONSISTENT WITH THE 
KNOWLEDGE RELEVANT TO THE 
COLLECTION 



4110 



FIGURE 41 A 
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4112 RETRIEVING FROM ARCHIVE SELF- 

\. DESCRIBING, INFRASTRUCTURE- 
^ INDEPENDENT REPRESENTATION OF 
PRESENTATION MECHANISM FOR ONE 
OR MORE DATA OBJECTS 



RETRIEVING ONE OR MORE DATA 
OBJECTS FROM QUERY-ABLE 
MECHANISM 



4116 



VERIFYING THAT THE ONE OR MORE 

DATA OBJECTS ARE CONSISTENT 
WITH KNOWLEDGE RELEVANT TO THE 
COLLECTION 



4118 



PRESENTING THE ONE OR MORE 
DATA OBJECTS USING THE 
PRESENTATION MECHANISM 



FIGURE 41 B 
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4200. 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
KNOWLEDGE RELEVANT TO THE 
COLLECTION 



^ USING THE KNOWLEDGE TO VALIDATE 
THE COLLECTION 



FIGURE 42 



PERSISTENT ARCHIVES 
Inventors: Reagan W. Moore, et al. 
Howrey Docket No. 02737.0004 .NPUSOl 
107/118 



ACCESSION 
TEMPLATE 



CLOSURE 
CONCEPT/ATTRIBUTE 



ATTRIBUTE 
INVERSE INDEXING 



ATTRIBUTE 
SELECTION 




ATTRIBUTE 
TAGGING 




OCCURRENCE 
TAGGING 







FIGURE 43 



4400 RETRIEVING FROM ARCHIVE SELF- 

\ DESCRIBING, INFRASTRUCTURE- 

^ INDEPENDENT, OR EXECUTABLE 
REPRESENTATION OF 
TRANSFORMATION PROCEDURE 
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4402 EXECUTING THE PROCEDURE TO 

■\ TRANSFORM DATA RECORDS INTO A 
^ SELF-DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
DATA OBJECTS 



FIGURE 44A 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE 

REPRESENTATION OF 
TRANSFORMATION PROCEDURE 



4406 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFFfASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
DATA OBJECTS 



EXECUTING THE PROCEDURE TO 
TRANSFORM SELF-DESCRIBING, 
INFRASTRUCTURE-INDEPENDENT 
REPRESENTATION OF DATA OBJECTS 

INTO A FORM CAPABLE OF BEING 
INSTANTIATED ONTO A QUERY-ABLE 
MECHANISM 



FIGURE 44B 
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RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE 

REPRESENTATION OF 
TRANSFORMATION PROCEDURE 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
DATA OBJECTS 



EXECUTING THE PROCEDURE TO 
TRANSFORM SELF-DESCRIBING, 
INFRASTRUCTURE-I NDEPENDENT 
REPRESENTATION OF DATA OBJECTS 
INTO OCCURRENCES OF ATTRIBUTE 
OR ELEMENT VALUES 



FIGURE 44C 



4502 



FORMING FROM THE TAGGED DATA 
RECORDS OCCURRENCES OF 
ATTRIBUTE OR ELEMENT VALUES 



FIGURE 45 
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AT LEAST ONE REPRESENTATION OF COLLECTION 



AT LEAST ONE SELF-DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE SPECIFICATION OF 
ONE OR MORE TRANSFORMATIONS RELEVANT TO 
COLLECTION 



AT LEAST ONE SELF-DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE SPECIFICATION OF 
ONE OR MORE RULES RELEVANT TO THE 
COLLECTION 



SELF-DESCRIBING, INFRASTRUCTURE-INDEPENDENT 
REPRESENTATION OF PRESENTATION MECHANISM 
(OPTIONAL) 



FIGURE 46 
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4700 
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%%% Ruhs for (fELEMBNT X (YZ)) 

false <-P;X. not (Pl):Y 

false <-P:X. not (P2):Z. 

false *- P:X, not Pf--+-] 

false P : X[N-i..]. mt M=I, not N=2, 

%%% Rules for {fELEMENT X {Y\ Z}> 

falte ^ P ; Xfl^Aj, ooi A : Y, not A : Z 

falfs <-P:X, rtot F[.^.l 

false <- P : X[N-*.], not M^l. 

%%% Rule for {'ELEMENT X (Y)* ) 

false <~P:X[.->q, rtotC^Y 



% 1st ckiid it not Y 
% 2nd child ts noi Z 
% thtrv are no children 
% there are othsT children 

% 1st child other liian Y or Z 
% there uit! iw children 
% a non-lst child 

% a non- Y citiid 
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RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE 

SPECIFICATION OF ONE OR MORE 
TRANSFORMATIONS 



RETRIEVING FROM ARCHIVE ONE OR 
MORE DATA OBJECTS FROM THE 
COLLECTION 



5004 EXECUTING THE SPECIFICATION TO 

AUTOMATICALLY PLACE THE ONE OR 
^ MORE DATA OBJECTS INTO A FORM 
SUITABLE FOR INSTANTIATION ONTO 
A QUERY-ABLE MECHANISM 



FIGURE 50 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE 

SPECIFICATION OF ONE OR MORE 
RULES 



EXECUTING THE SPECIFICATION TO 
AUTOMATICALLY VALIDATE THE 
COLLECTION 



FIGURE 51A 
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DETERMINING THAT THE 
OCCURRENCES ARE CONSISTENT 
WITH THE RULES ENCODED BY THE 
SPECiFICATiON AND ANY VALID 
EXCEPTIONS 



FIGURE 51 B 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE 
SPECiFICATiON OF OF ONE OR MORE 
TRANSFORMATIONS 



RETRIEVING FROM ARCHIVE ONE OR 
MORE DATA OBJECTS FROM THE 
COLLECTION 



EXECUTING THE SPECIFICATION TO 
AUTOMATICALLY PLACE THE ONE OR 
MORE DATA OBJECTS INTO A FORM 
SUITABLE FOR PRESENTATION 



FIGURE 52 
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#...^^^...^..^....^...^.^-_^-^ 

# An excerpt of an example of a Topic Map for the SLA (Senate) 

# Collection. 
# 

# 4 Topics are shown: t1 , t2, t3, and t4 of type "SubjectEntry" 

# -> These are actually Subject index Entries found in the 

# raw data 
# 

# For each topic, there is an occurence list of locator elements 

# corresponding to the bills that discuss that topic. 

# 

# In addition, topics are related to each other through associations. 

# Here we created two types of associations: 

# <assoc types="CoDiscussedlnExactlyOneBiH"> 

# <assoc types="CoDiscussedlnTwoOrMoreBills"> 

# 

# showing the "degree of connectedness" between two topics. 

# These would be value-added relationships, as they are implicit 

# in the raw data, and discovered by our topic map building 

# routines. 
# 

# Bertram Ludaescher & Richard Marciano ~ March 20, 2001 

# 

<!DOCTYPE toplcmap [ 
<! ELEMENT topicmap (topic | assoc )* > 
<!ELEMENT topic (topname | occurs)* > 
<!ATTLIST topic 
id ID #REQUIRED 
types CDATA#IMPLIED 
> 

<! ELEMENT topname (basename, dispname, sortname)> 
<! ELEMENT basename (#PCDATA) > 
<!ELEMENT dispname (#PCDATA) > 
<! ELEMENT sortname (#PCDATA) > 
<! ELEMENT occurs (locator*) > 
<!ELEMENT locator EMPTY > 
<!ATTLiST locator 
role CDATA #REQUIRED 
href CDATA #REQUIRED 
> 

<! ELEMENT assoc (assocrl*) > 
<!ATTLIST assoc 
types CDATA #IMPLIED 

<! ELEMENT assocri EMPTY > 
<!ATTLIST assocrl 
role CDATA #REQUIRED 
href CDATA #REQUIRED 



]> 



54B 
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<topicmap> 
<topic id="t1" types="SubjectEntry"> 
<topname> 

<basename>Apartment houses</basename> 
<dispname>Apt. Houses</clispname> 
<sortname>APARTMENTHOUSES</sortname> 
</topnanne> 
<occurs> 

<locator role="Discussedln" href="#S.463" /> 
</occurs> 
</topic> 

<topic id="t2" types="SubjectEntry"> 
<topname> 
<basename>Children</basenanne> 

<dispname>Child.</dispname> 
<sortname>CHILDREN</sortname> 
</topname> 
<occurs> 

<locator role="Discussedln" href="#S.300" /> 
<locator role="Discussedln" href="#S.463" /> 
<locator roie="Discussed!n" href="#S.1638" /> 
<locator role="Discussedln" href="#S.1673" /> 
<locator roie="Discussedin" href="#S.1709" /> 
<locator role="Discussedln" href="#S.Res.125" /> 
<locator role="Discussedin" href="#S.Res.258" /> 
</occurs> 
</topic> 

<topic id="t3" types="SubjectEntry"> 
<topname> 
<basename>Welfare</basename> 

<dispname>Welf.</dispname> 
<sortname>WELFARE</sortname> 
</topname> 
<occurs> 

<locator roIe="Discussedln" href="#S.463" /> 
<locator role="Discussedln" href="#S.1277" /> 
<locator role="Discussedln" href="#S.1709" /> 
<locator role="Discussedin" href="#S.Con.Res.28" /> 
<locator role="Discussedln" href="#S.Res.125" /> 
<locator role="Discussedln" href="#S.Res.260" /> 
</occurs> 
</topic> 
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<topic id="t4" types="SubjectEntry"> 
<topname> 

<basename>Youth ennployment</basename> 

<dispname> Youth empl.</dispname> 
<sortname>YOUTEMPLOYMENT</sortname> 
</topname> 
<occurs> 

<locator role="Discussedln" href="#S.463" /> 
</occurs> 
</topic> 

<assoc types="CoDiscussedlnExactlyOneBiH"> 
ossocrl role="DiscussedlnSameBiH" href="t1" /> 
<assocrl role="DiscussedlnSameBill" href="t2"/> 
<assocrl role="DiscussedlnSameBill" href="t3" /> 
ossocrl role-'DiscussedlnSameBill" href="t4"/> 
</assoc> 

<assoc types="CoDiscussedlnTwoOrMoreBills"> 
<assocrl role-'DiscussedlnSameBill" href="t2" /> 
ossocrl role-'DiscussedlnSameBill" href="t3" /> 
</assoc> 

</topicmap> 



FIGURE 54C 



